/**
 * Project Name:webcrawler-sourceer
 * File Name:OschinaBlog.java
 * Package Name:com.zongtui.webcrawler.sourceer.crawler
 * Date:2015-4-29下午5:31:34
 * Copyright (c) 2015, 众推项目组版权所有.
 *
 */

package com.zongtui.webcrawler.sourceer.crawler.webmagic;

import java.util.Date;
import java.util.List;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.ConsolePageModelPipeline;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.Formatter;
import us.codecraft.webmagic.model.annotation.TargetUrl;

/**
 * ClassName: OschinaBlog <br/>
 * Function: 测试webmagic. <br/>
 * date: 2015-4-29 下午5:31:34 <br/>
 * 
 * @author feng
 * @version
 * @since JDK 1.7
 */
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
public class OschinaBlog {
	@ExtractBy("//title")
	private String title;

	@ExtractBy(value = "div.BlogContent", type = ExtractBy.Type.Css)
	private String content;

	@ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
	private List<String> tags;

	@Formatter("yyyy-MM-dd HH:mm")
	@ExtractBy("//div[@class='BlogStat']/regex('\\d+-\\d+-\\d+\\s+\\d+:\\d+')")
	private Date date;

	public static void main(String[] args) {
		OOSpider.create(
				Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"),
				new ConsolePageModelPipeline(), OschinaBlog.class).run();
	}
}
